library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
data <- read_delim("injurydata.csv",";", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_character(),
## XCoord = col_double(),
## YCoord = col_double(),
## OBJECTID = col_double(),
## ID = col_double(),
## UPA = col_double(),
## EVENTDATE = col_datetime(format = ""),
## PRIMARYNAICS = col_double(),
## HOSPITALIZED = col_double(),
## AMPUTATION = col_double(),
## NEAR_DIST = col_double(),
## NEAR_X = col_double(),
## NEAR_Y = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 522 parsing failures.
## row col expected actual file
## 33 -- 36 columns 37 columns 'injurydata.csv'
## 130 -- 36 columns 37 columns 'injurydata.csv'
## 169 -- 36 columns 39 columns 'injurydata.csv'
## 237 -- 36 columns 37 columns 'injurydata.csv'
## 332 -- 36 columns 37 columns 'injurydata.csv'
## ... ... .......... .......... ................
## See problems(...) for more details.
#cleaning_employer
data$EMPLOYER =gsub(".*usps|us postal|united states postal|u.s. postal|u.s postal|u. s postal|u. s. postal.*","US_Postal_Service", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*US_Postal_Service.*","USPS", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*united parcel|ups |ups,.*","United_Parcel_Service", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*United_Parcel_Service.*","UPS", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*american airl.*","American Airlines", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*AT &|AT&.*","AT_T", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*AT_T.*","AT&T Inc", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*walmart|wallmart|wal-mart.*","wal_mart", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*wal_mart.*","Walmart", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Publix.*","Publix_", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Publix_.*","Publix", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Asplundh.*","Asplundh_", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Asplundh_.*","Asplundh", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*sodexo.*","sodexo_", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*sodexo_.*","Sodexo", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Waste Management.*","Waste_Management", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Waste_Management.*","Waste Management", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Tyson Foods.*","Tyson_Foods", ignore.case = TRUE, data$EMPLOYER)
data$EMPLOYER =gsub(".*Tyson_Foods.*","Tyson Foods", ignore.case = TRUE, data$EMPLOYER)
#add SES
#a <-read_csv("ACS_17_5YR_S0101_with_ann.csv")
#b <-read_csv("ACS_17_5YR_S0701_with_ann.csv")
#c <-read_csv("ACS_17_5YR_S0801_with_ann.csv")
#d <-read_csv("ACS_17_5YR_S1101_with_ann.csv")
#e <-read_csv("ACS_17_5YR_S1901_with_ann.csv")
#f <-read_csv("ACS_17_5YR_S2301_with_ann.csv")
#a<-a %>% rename('ZIP' = 'GEO.id2')
#b<-b %>% rename('ZIP' = 'GEO.id2')
#c<-c %>% rename('ZIP' = 'GEO.id2')
#d<-d %>% rename('ZIP' = 'GEO.id2')
#e<-e %>% rename('ZIP' = 'GEO.id2')
#data <- data %>% left_join(a,by="ZIP")
#data <- data %>% left_join(b,by="ZIP")
#data <- data %>% left_join(c,by="ZIP")
#data <- data %>% left_join(d,by="ZIP")
#data <- data %>% left_join(e,by="ZIP")
#add_state population
f <-read_csv("state_pop.csv",col_names = F)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_double()
## )
f <- rename(f,STATE=X1)
f <- rename(f,POP=X2)
datax <- data %>% left_join(f, by="STATE")
#filter hospitarisation(0/1)
dataa <- datax %>% filter(HOSPITALIZED<2)
datas <- dataa %>% filter(HOSPITALIZED>-1)
#filter amputation(0/1)
datad <- datas %>% filter(AMPUTATION<2)
dataf <- datad %>% filter(AMPUTATION>-1)
#filter by hospital id
dataq <- dataf %>% filter(NEAR_FID>0)
#distance less than 10^5 meter
data <- dataq %>% filter(NEAR_DIST<10^5)
#add_multiple injury
Left<-function(x,chr){
return(substr(x,1,chr))}
data<-mutate(data, leftMul = Left(PARTOFBODYTITLE,3))
data<-mutate(data, MultiInj = ifelse(data$leftMul=="Mul", "1", "0"))
data
## # A tibble: 44,660 x 39
## XCoord YCoord OBJECTID ID UPA EVENTDATE EMPLOYER
## <dbl> <dbl> <dbl> <dbl> <dbl> <dttm> <chr>
## 1 -74.5 41.5 1 2.02e9 931176 2015-01-01 00:00:00 FCI Oti~
## 2 -89.8 43.6 2 2.02e9 930267 2015-01-01 00:00:00 Kalahar~
## 3 -80.1 40.5 3 2.02e9 929823 2015-01-01 00:00:00 Schneid~
## 4 -83.6 32.8 4 2.02e9 929711 2015-01-01 00:00:00 PEPSI B~
## 5 -89.0 42.7 5 2.02e9 929642 2015-01-01 00:00:00 North A~
## 6 -81.9 26.7 6 2.02e9 929709 2015-01-01 00:00:00 The Hom~
## 7 -82.4 28.0 7 2.02e9 932133 2015-01-01 00:00:00 Gopher ~
## 8 -105. 40.5 8 2.02e9 930028 2015-01-02 00:00:00 Foster ~
## 9 -99.4 36.4 9 2.02e9 930006 2015-01-02 00:00:00 Trinida~
## 10 -81.5 28.6 10 2.02e9 933583 2015-01-02 00:00:00 The Kry~
## # ... with 44,650 more rows, and 32 more variables: ADDRESS1 <chr>,
## # ADDRESS2 <chr>, CITY <chr>, STATE <chr>, ZIP <chr>, LATITUDE <chr>,
## # LONGITUDE <chr>, PRIMARYNAICS <dbl>, HOSPITALIZED <dbl>,
## # AMPUTATION <dbl>, INSPECTION <chr>, FINALNARRATIVE <chr>,
## # NATURE <chr>, NATURETITLE <chr>, PARTOFBODY <chr>,
## # PARTOFBODYTITLE <chr>, EVENT <chr>, EVENTTITLE <chr>, SOURCE <chr>,
## # SOURCETITLE <chr>, SECONDARYSOURCE <chr>, SECONDARYSOURCETITLE <chr>,
## # DATE <chr>, DATE_X <chr>, DATE_Y <chr>, NEAR_FID <chr>,
## # NEAR_DIST <dbl>, NEAR_X <dbl>, NEAR_Y <dbl>, POP <dbl>, leftMul <chr>,
## # MultiInj <chr>
#### 出力
##write.csv(datag, "C:/Users/keiko/Desktop/BST260-PROJECT/injurydata1130.csv", sep=";" )
data_bar <- data %>% mutate(AMPUTATION = as.factor(AMPUTATION)) %>%
group_by(AMPUTATION) %>%
summarise(mean = mean(NEAR_DIST), sd =sd(NEAR_DIST), se = sd(NEAR_DIST)/sqrt(n()))
data_bar
## # A tibble: 2 x 4
## AMPUTATION mean sd se
## <fct> <dbl> <dbl> <dbl>
## 1 0 5860. 7054. 39.0
## 2 1 6267. 6902. 63.2
data_bar %>%
ggplot(aes(x = AMPUTATION, y = mean, fill=AMPUTATION)) +
geom_bar(stat = "identity", width = 0.5)+
geom_errorbar(aes(ymin=mean - se, ymax= mean +se), width = 0.3) +
xlab("amputation")+
ylab("distance from hospital")
summary(lm(AMPUTATION ~ NEAR_DIST, data=data))
##
## Call:
## lm(formula = AMPUTATION ~ NEAR_DIST, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4142 -0.2657 -0.2605 0.7136 0.7429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.570e-01 2.747e-03 93.585 < 2e-16 ***
## NEAR_DIST 1.620e-06 2.982e-07 5.433 5.56e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4421 on 44658 degrees of freedom
## Multiple R-squared: 0.0006606, Adjusted R-squared: 0.0006382
## F-statistic: 29.52 on 1 and 44658 DF, p-value: 5.565e-08
res <- t.test(NEAR_DIST ~ AMPUTATION, data = data)
res$p.value
## [1] 4.090862e-08
res$estimate
## mean in group 0 mean in group 1
## 5859.654 6267.409
data_amputation_rate <- data %>% group_by(STATE) %>%
summarise(mean = mean(NEAR_DIST), sd =sd(NEAR_DIST), se = sd(NEAR_DIST)/sqrt(n()),amputation_rate = mean(AMPUTATION), amputation =sum(AMPUTATION))
# manipulate data for the map
data_amputation_rate_1 <- data_amputation_rate %>%
mutate(region = str_to_lower(STATE))
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
states_map <- map_data("state")
amputation_map <- left_join(states_map, data_amputation_rate_1, by = "region")
# Create the map
library(ggthemes)
amputation_map %>%
ggplot(mapping = aes(long, lat, group = group,fill = amputation_rate))+
geom_polygon(color = "black", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
labs(title = "Amputation Rate") +
theme_map() +
scale_fill_gradient(low = "white", high = "#CB454A")
data_amputation_rate <- data_amputation_rate %>% filter(amputation >= 10)
data_amputation_rate %>%
ggplot(aes(x = mean, y = amputation_rate, label = STATE)) +
geom_point()+
xlab("Distance from hospital")+
ylab("Amputation rate")+
ggtitle("Amputation Rate and Distance by State")
library(openintro)
## Please visit openintro.org for free statistics materials
##
## Attaching package: 'openintro'
## The following object is masked from 'package:ggplot2':
##
## diamonds
## The following objects are masked from 'package:datasets':
##
## cars, trees
library(ggrepel)
data_amputation_rate %>%
mutate(abb = state2abbr(STATE)) %>%
ggplot(aes(x = mean, y = amputation_rate, label = abb, size = amputation)) +
geom_point()+
geom_text_repel(size =3) +
xlab("Distance from hospital")+
ylab("Amputation rate") +
xlim(0,15000) +
ggtitle("Amputation Rate and Distance by State")
data_amputation_rate %>% filter(amputation >= 10) %>%
ggplot(aes(x = mean, y = amputation_rate, label = STATE)) +
geom_point()+
geom_smooth(method="lm") +
xlab("Distance from hospital")+
ylab("Amputation rate") +
xlim(0,15000) +
ggtitle("Amputation Rate and Distance by State")
cor.test(data_amputation_rate$mean, data_amputation_rate$amputation_rate)
##
## Pearson's product-moment correlation
##
## data: data_amputation_rate$mean and data_amputation_rate$amputation_rate
## t = 0.78394, df = 33, p-value = 0.4387
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2073782 0.4482609
## sample estimates:
## cor
## 0.1352126
library(maps)
library(ggthemes)
us_map <- ggplot()+
geom_polygon(data = states_map, aes(long, lat, group = group), color = "black", size = 0.1, fill = "white") +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
theme_map()
us_map
data_map <- data %>% mutate(LONGITUDE = as.numeric(LONGITUDE), LATITUDE =as.numeric(LATITUDE), AMPUTATION = as.factor(AMPUTATION)) %>% filter(LATITUDE > 20 & LATITUDE < 50 & LONGITUDE > -120)
injury_map <- us_map + geom_point(data = data_map, aes(x=LONGITUDE, y=LATITUDE, color = AMPUTATION), size=0.0001) +
ggtitle("Injury and Amputation Map")
injury_map
hosp_data <- read_csv("HPDATA.csv") %>% mutate(long= as.numeric(LONGI), lat= as.numeric(LATIT)) %>%
filter(lat> 20 & lat< 50 & long > -120)
## Warning: Missing column names filled in: 'X7' [7]
## Parsed with column specification:
## cols(
## OBJECTID = col_double(),
## NAME = col_character(),
## XCoord = col_double(),
## YCoord = col_double(),
## LATIT = col_character(),
## LONGI = col_double(),
## X7 = col_double()
## )
## Warning: NAs introduced by coercion
injury_map + geom_point(data = hosp_data, aes(x=long, y=lat), color = "black", size = 0.1, alpha = 0.5) +
ggtitle("Injury and Hospital Map")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
data2 <- data %>% mutate(month = month(EVENTDATE), year = year(EVENTDATE))
timeseries <- data2%>% group_by(year,month) %>%
summarise(amputation =sum(AMPUTATION)) %>% inner_join(data2, by=c('year'='year','month'='month'))
timeseries <- data2 %>% group_by(year, month) %>%
mutate(amputation =sum(AMPUTATION))
timeseries %>% ggplot()+aes(EVENTDATE,amputation) +geom_line()+
xlab('Date')+ylab('Number of Amputaions (people)')
timeseries %>% ggplot()+aes(EVENTDATE,amputation) +geom_smooth()+
xlab('Date')+ylab('Number of Amputaions (people)')
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
injury_state <- data %>% group_by(STATE) %>% summarise(number = n())
injury_state_pop <- transform(injury_state, population = c(4898246,735720,7275070,3026412,39747267,5770545,3567871,975033,711571,29087070,10627767,162742,1416589,1790182,12700381,6718616,3167997,2910931,4484047,4652581,1342097,6062917,6939373,10020472,5655925,2987895,6147861,1074532,1940919,3087025,1363852,8922547,2096034,19491339,10497741,760900,55144,11718568,3948950,4245901,12813969,3113659,1056738,5147111,892631,6833793,29087070,3221610,627180,106405,8571946,7666343,1791951,5832661,572381))
ggplot(data = injury_state_pop, aes(x = population, y = number, label = STATE)) +
labs(title = "Relationship betweem population and the number of injury according to states ", x = "Population", y = "Number of injury") + geom_point()
pie_chart_hosp <- data %>% select(HOSPITALIZED)
pie_chart_hosp1 <- data %>% select(HOSPITALIZED)
table(pie_chart_hosp)
## pie_chart_hosp
## 0 1
## 8810 35850
pie_chart_hosp2 <- c(8881,36041)
pie(pie_chart_hosp2, radius=1, labels=c("Non-Hospitalzed 19.8%","Hospitalized 80.2%"), col=c("#0000FF","#FF8000"), main="Hospitalization Rate in All Injuries")
pie_chart_amp1 <- data %>% select(AMPUTATION)
table(pie_chart_amp1)
## pie_chart_amp1
## 0 1
## 32749 11911
pie_chart_amp2 <- c(33173,11997)
pie(pie_chart_amp2, radius=1, labels=c("Non-Amputation 73.4%","Amputation 26.6%"), col=c("#BFFF00","#FF0000"), main="Amputation Rate in All Injuries")
library(tidytext)
library(tidyverse)
library(wordcloud2)
#NLP
text <- data %>% unnest_tokens(word, FINALNARRATIVE)
data(stop_words)
text <- text %>% anti_join(stop_words)
## Joining, by = "word"
text <-transform(text,Freq=0)
#Bar chart
text %>%
count(word, sort = TRUE) %>%
filter(n > 5000) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_col() +
xlab(NULL) +
coord_flip()
#Word Cloud
library(wordcloud2)
wordcloud2(text,size=1,minSize=10)
wctext <- text %>% group_by(word) %>% mutate(Freq = n())
wctext%>% select(word,Freq)%>%
distinct(word, .keep_all = TRUE)%>%
arrange(desc(Freq)) %>%
wordcloud2()
state_hos_rate1 <- data %>% group_by(STATE) %>% filter(HOSPITALIZED==1) %>% summarise(number1 = n())
state_hos_rate2 <- data %>% group_by(STATE) %>% filter(HOSPITALIZED!=2,3,4,9) %>% summarise(number2 = n())
pie_chart_state1 <- merge(injury_state,state_hos_rate1)
pie_chart_state2 <- merge(pie_chart_state1, state_hos_rate2)
pie_chart_state3 <- mutate(pie_chart_state2, hos_rate=(number1/number2))
state_amp_rate1 <- data %>% group_by(STATE) %>% filter(AMPUTATION==1) %>% summarise(number3 = n())
state_amp_rate2 <- data %>% group_by(STATE) %>% filter(AMPUTATION!=2,3,4,9) %>% summarise(number4 = n())
pie_chart_state4 <- merge(pie_chart_state3,state_amp_rate1)
pie_chart_state5 <- merge(pie_chart_state4,state_amp_rate2)
pie_chart_state6 <- mutate(pie_chart_state5, amp_rate=(number3/number4))
ggplot(data = pie_chart_state6, aes(x = hos_rate, y = amp_rate, label = STATE)) +
labs(title = "Relationship betweem Hospitalization and Amputation Rates according to States ", x = "Hospitalization Rate", y = "Amputation Rate") + geom_point()
```r
data$HOSPITALIZED <- as.numeric(data$HOSPITALIZED)
data$MultiInj <- as.factor(data$MultiInj)
boxplot
data$AMPUTATION <- as.factor(data$AMPUTATION)
data %>%
ggplot(aes(x=AMPUTATION,y=log10(NEAR_DIST)))+
geom_boxplot()+
xlab("amputation")+
ylab("distance from hospital log10")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
density graph
data %>% ggplot(aes(log10(NEAR_DIST), fill=AMPUTATION)) + geom_density(alpha = 0.2)+
xlab("distance to hospital, log10, meter")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 1 rows containing non-finite values (stat_density).
ttest
t.test(data$NEAR_DIST~data$AMPUTATION)
##
## Welch Two Sample t-test
##
## data: data$NEAR_DIST by data$AMPUTATION
## t = -5.4889, df = 21549, p-value = 4.091e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -553.3651 -262.1460
## sample estimates:
## mean in group 0 mean in group 1
## 5859.654 6267.409
####univariate Regression (distance)
mlog <- glm(AMPUTATION ~ NEAR_DIST , data = data, family = "binomial")
summary(mlog)
##
## Call:
## glm(formula = AMPUTATION ~ NEAR_DIST, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0587 -0.7858 -0.7772 1.5823 1.6475
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.060e+00 1.401e-02 -75.64 < 2e-16 ***
## NEAR_DIST 7.975e-06 1.471e-06 5.42 5.97e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 51801 on 44659 degrees of freedom
## Residual deviance: 51773 on 44658 degrees of freedom
## AIC: 51777
##
## Number of Fisher Scoring iterations: 4
OR
exp(cbind(OR = coef(mlog), confint(mlog)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3465533 0.3371557 0.3561901
## NEAR_DIST 1.0000080 1.0000051 1.0000108
#create values
fit_1 <- with(data, data.frame(NEAR_DIST = rep(seq(from = 1, to = 100000, length.out = 1000), 2)))
#generate
fit_2 <- cbind(fit_1, predict(mlog, newdata = fit_1, type = "link",se = TRUE))
fit_2 <- within(fit_2, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
## view first few rows of final dataset
head(fit_2)
## NEAR_DIST fit se.fit residual.scale UL LL
## 1 1.0000 -1.059711 0.01400918 1 0.2626476 0.2521518
## 2 101.0991 -1.058912 0.01391461 1 0.2627663 0.2523373
## 3 201.1982 -1.058114 0.01382096 1 0.2628854 0.2525226
## 4 301.2973 -1.057316 0.01372825 1 0.2630049 0.2527077
## 5 401.3964 -1.056517 0.01363650 1 0.2631248 0.2528924
## 6 501.4955 -1.055719 0.01354573 1 0.2632451 0.2530769
## PredictedProb
## 1 0.2573648
## 2 0.2575174
## 3 0.2576700
## 4 0.2578227
## 5 0.2579755
## 6 0.2581284
nrow(fit_2)
## [1] 2000
#plot
ggplot(fit_2, aes(x = NEAR_DIST, y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
ymax = UL), alpha = 0.2) + geom_line()+xlab("Distance from Hospital")+ylab("Predicted Probability of Amputation")+xlim(100,50000)
## Warning: Removed 1002 rows containing missing values (geom_path).
ggplot(fit_2, aes(x = log10(NEAR_DIST), y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
ymax = UL), alpha = 0.2) + geom_line()+xlab("Distance from Hospital")+ylab("Predicted Probability of Amputation")
####multivariate logistic Regression (distance, hospitalization, multiple injury+/-)
mylogit <- glm(AMPUTATION ~ NEAR_DIST + HOSPITALIZED + MultiInj, data = data, family = "binomial")
summary(mylogit)
##
## Call:
## glm(formula = AMPUTATION ~ NEAR_DIST + HOSPITALIZED + MultiInj,
## family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3385 -0.4497 -0.4443 0.1020 3.4193
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.126e+00 1.417e-01 36.172 < 2e-16 ***
## NEAR_DIST 8.070e-06 2.477e-06 3.258 0.00112 **
## HOSPITALIZED -7.405e+00 1.421e-01 -52.096 < 2e-16 ***
## MultiInj1 -3.570e+00 2.927e-01 -12.195 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 51801 on 44659 degrees of freedom
## Residual deviance: 21378 on 44656 degrees of freedom
## AIC: 21386
##
## Number of Fisher Scoring iterations: 8
calculating Odds Ratio
exp(cbind(OR = coef(mylogit), confint(mylogit)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 1.683448e+02 1.290296e+02 2.251316e+02
## NEAR_DIST 1.000008e+00 1.000003e+00 1.000013e+00
## HOSPITALIZED 6.080158e-04 4.543011e-04 7.940252e-04
## MultiInj1 2.815471e-02 1.514230e-02 4.787999e-02